import re
import sha
from datetime import datetime
from time import strptime

PATCH_DATE_FORMAT = '%Y%m%d%H%M%S'

patch_pattern = r"""
   \[                                   # Patch start indicator
   (?P<name>[^\n]+)\n                   # Patch name (rest of same line)
   (?P<author>[^\*]+)                   # Patch author
   \*                                   # Author/date separator 
   (?P<inverted>[-\*])                  # Inverted patch indicator
   (?P<date>\d{14})                     # Patch date
   (?:\n(?P<comment>(?:^\ [^\n]*\n)+))? # Optional long comment
   \]                                   # Patch end indicator
   """
patch_re = re.compile(patch_pattern, re.VERBOSE | re.MULTILINE)
tidy_comment_re = re.compile(r'^ ', re.MULTILINE)

class Patch:
    """
    Patch details, as defined in a darcs inventory file.

    Attribute names match those generated by the
    ``darcs changes --xml-output`` command.
    """
    def __init__(self, name, author, date, inverted, comment=None):
        self.name = name
        self.author = author
        self.date = datetime(*strptime(date, PATCH_DATE_FORMAT)[:6])
        self.inverted = inverted
        self.comment = comment

    def __str__(self):
        return self.name

    @property
    def hash(self):
        """
        Calculates the filename of the gzipped file containing patch
        contents in the repository's ``patches`` directory.

        This consists of the patch date, a partial SHA-1 hash of the
        patch author and a full SHA-1 hash of the complete patch
        details.
        """
        date_str = self.date.strftime(PATCH_DATE_FORMAT)
        complete_patch_details = '%s%s%s%s%s' % (
            self.name, self.author, date_str,
            self.comment and ''.join([l.rstrip() for l in self.comment.split('\n')]) or '',
            self.inverted and 't' or 'f',
        )
        return '%s-%s-%s.gz' % (date_str,
                                sha.new(self.author).hexdigest()[:5],
                                sha.new(complete_patch_details).hexdigest())

def parse_inventory(inventory):
    """
    Given the contents of a darcs inventory file, generates ``Patch``
    objects representing contained patch details.
    """
    for match in patch_re.finditer(inventory):
        attrs = match.groupdict(None)
        attrs['inverted'] = (attrs['inverted'] == '-')
        if attrs['comment'] is not None:
            attrs['comment'] = tidy_comment_re.sub('', attrs['comment']).strip()
        yield Patch(**attrs)

if __name__ == '__main__':
    import urllib2
    inventory = urllib2.urlopen('http://darcs.net/_darcs/inventory').read()
    for patch in parse_inventory(inventory):
        print patch.__dict__
